K-Folds Process
Setup
Values will differ from last submission due to new random sampling of the data for train/test sets.
library(tidyverse)
library(dplyr)
library(caret)
library(class)
library(yardstick)
library(plotly)
library(boot)
library(pROC)
library(glmnet)
library(purrr)
library(gridExtra)
library(randomForest)
library(e1071)
#install.packages("kernlab")
#reading in the data
data <- read.csv("HaitiPixels.csv", header=TRUE ,sep=",")
data <- data %>%
mutate(BlueClass = as.factor(ifelse(Class=="Blue Tarp","Yes", "No")))
#check the levels just specified
levels(data$BlueClass)
[1] "No" "Yes"
#set data var to be columns 2-5 of the set
data = data[c(2:5)]
data <- data %>% mutate(id = row_number())
#check addition
head(data$id)
[1] 1 2 3 4 5 6
#shuffle data to fairly split into test / train
shuffleddata = sample_n(data, nrow(data))
#check that it has been shuffled
head(shuffleddata$id) #different then the first six lines of the csv file
[1] 2528 4582 46254 27551 40372 45830
#remove the id column
shuffleddata = shuffleddata[c(1:4)]
#split the data into test and train for use in our upcoming models
#using a 10k subset for faster knn function execution and file freezing issues
samp <- 1:10000
samp2 <- 10001:20000
train<-shuffleddata[samp,]
test <- shuffleddata[samp2,]
head(train)
head(test)
KNN
#model training rules for all models
train_control <- caret::trainControl(method="cv", number=10, returnResamp='all', classProbs=TRUE, savePredictions='final')
#KNN model
system.time({
knnmod=train(BlueClass~Red+Green+Blue,data=train,trControl=train_control,method="knn",preProcess = c("center","scale"), tuneGrid = expand.grid(k = c(1:15)))
})
user system elapsed
17.430 0.224 17.741
knnmod
k-Nearest Neighbors
10000 samples
3 predictor
2 classes: 'No', 'Yes'
Pre-processing: centered (3), scaled (3)
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 9000, 9001, 9000, 9001, 8999, 9000, ...
Resampling results across tuning parameters:
k Accuracy Kappa
1 0.9958002 0.9300980
2 0.9961002 0.9350348
3 0.9961003 0.9359875
4 0.9964002 0.9408173
5 0.9963002 0.9389018
6 0.9962001 0.9375236
7 0.9962007 0.9380805
8 0.9959007 0.9325120
9 0.9959005 0.9314887
10 0.9960002 0.9334969
11 0.9954004 0.9234100
12 0.9955003 0.9242257
13 0.9954003 0.9228145
14 0.9953005 0.9208521
15 0.9954002 0.9223463
Accuracy was used to select the optimal model using the largest value.
The final value used for the model was k = 4.
#plot KNN model
plot(knnmod)

#set prediction, probability, and cv score variables in case needed
knnmod_pred <- predict(knnmod, test,'raw')
knnmod_prob <- predict(knnmod, test,'prob')
knnmod_scored <- cbind(test, knnmod_pred, knnmod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)
#area under the curve
knn_auc = knnmod_prob %>%
yardstick::roc_auc(truth=test$BlueClass, Yes)
The `yardstick.event_first` option has been deprecated as of yardstick 0.0.7 and will be completely ignored in a future version.
Instead, set the following argument directly in the metric function:
`options(yardstick.event_first = TRUE)` -> `event_level = 'first'` (the default)
`options(yardstick.event_first = FALSE)` -> `event_level = 'second'`
[90mThis warning is displayed once per session.[39m
knn_auc
#ROC curve + plot
ROC_curve<-knnmod_prob %>%
yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
dplyr::mutate(one_minus_specificity = 1-specificity)
ROC_curve_plot <- ROC_curve %>%
ggplot(aes(x=one_minus_specificity,y=sensitivity))+
geom_line() + geom_point() +
geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
xlab("one_minus_specificity\n(false positive rate)")+
ggtitle('KNN ROC curve')
ggplotly(ROC_curve_plot)
#set threshold
knnmod_pred2 <- knnmod$pred %>%
#the accuracy doesn't improve by reducing the threshold any further than .66, 99.7% best.
mutate(prediction = ifelse(Yes>.66, 'Yes', 'No')) %>%
mutate(prediction = factor(prediction, levels=c('No','Yes')))
#confusion matrix
confusionMatrix(knnmod_pred2$prediction, knnmod_pred2$obs, positive="Yes")
Confusion Matrix and Statistics
Reference
Prediction No Yes
No 9673 26
Yes 13 288
Accuracy : 0.9961
95% CI : (0.9947, 0.9972)
No Information Rate : 0.9686
P-Value [Acc > NIR] : < 2e-16
Kappa : 0.9346
Mcnemar's Test P-Value : 0.05466
Sensitivity : 0.9172
Specificity : 0.9987
Pos Pred Value : 0.9568
Neg Pred Value : 0.9973
Prevalence : 0.0314
Detection Rate : 0.0288
Detection Prevalence : 0.0301
Balanced Accuracy : 0.9579
'Positive' Class : Yes
LDA
#LDA model
system.time({
ldamod=train(BlueClass~Red+Green+Blue,data=train,trControl=train_control,method="lda",preProcess = c("center","scale"), family="binomial")
})
user system elapsed
1.090 0.022 1.123
ldamod
Linear Discriminant Analysis
10000 samples
3 predictor
2 classes: 'No', 'Yes'
Pre-processing: centered (3), scaled (3)
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 9000, 8999, 9001, 9000, 9000, 9000, ...
Resampling results:
Accuracy Kappa
0.9850008 0.7661904
#set prediction, probability, and cv score variables in case needed
ldamod_pred <- predict(ldamod, test,'raw')
ldamod_prob <- predict(ldamod, test,'prob')
ldamod_scored <- cbind(test, ldamod_pred, ldamod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)
#area under the curve
lda_auc = ldamod_prob %>%
yardstick::roc_auc(truth=test$BlueClass, Yes)
lda_auc
#ROC curve + plot
ROC_curve2<-ldamod_prob %>%
yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
dplyr::mutate(one_minus_specificity = 1-specificity)
ROC_curve_plot2 <- ROC_curve2 %>%
ggplot(aes(x=one_minus_specificity,y=sensitivity))+
geom_line() + geom_point() +
geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
xlab("one_minus_specificity\n(false positive rate)")+
ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot2)
#set new threshold
ldamod_pred2 <- ldamod$pred %>%
#the accuracy doesn't improve by reducing the threshold any further than .85, 98.6% best.
mutate(prediction = ifelse(Yes>.85, 'Yes', 'No')) %>%
mutate(prediction = factor(prediction, levels=c('No','Yes')))
#new threshold matrix
confusionMatrix(ldamod_pred2$prediction, ldamod_pred2$obs, positive="Yes")
Confusion Matrix and Statistics
Reference
Prediction No Yes
No 9625 73
Yes 61 241
Accuracy : 0.9866
95% CI : (0.9841, 0.9888)
No Information Rate : 0.9686
P-Value [Acc > NIR] : <2e-16
Kappa : 0.7756
Mcnemar's Test P-Value : 0.342
Sensitivity : 0.7675
Specificity : 0.9937
Pos Pred Value : 0.7980
Neg Pred Value : 0.9925
Prevalence : 0.0314
Detection Rate : 0.0241
Detection Prevalence : 0.0302
Balanced Accuracy : 0.8806
'Positive' Class : Yes
QDA
#QDA model
system.time({
qdamod=train(BlueClass~Red+Green+Blue,data=train,trControl=train_control,method="qda",preProcess = c("center","scale"), family="binomial")
})
user system elapsed
1.044 0.013 1.277
qdamod
Quadratic Discriminant Analysis
10000 samples
3 predictor
2 classes: 'No', 'Yes'
Pre-processing: centered (3), scaled (3)
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 9000, 8999, 9000, 9000, 9001, 9000, ...
Resampling results:
Accuracy Kappa
0.9924998 0.871937
##set prediction, probability, and cv score variables in case needed
qdamod_pred <- predict(qdamod, test,'raw')
qdamod_prob <- predict(qdamod, test,'prob')
qdamod_scored <- cbind(test, qdamod_pred, qdamod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)
#area under the curve
qda_auc = qdamod_prob %>%
yardstick::roc_auc(truth=test$BlueClass, Yes)
qda_auc
#ROC curve + plot
ROC_curve3<-qdamod_prob %>%
yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
dplyr::mutate(one_minus_specificity = 1-specificity)
ROC_curve_plot3 <- ROC_curve3 %>%
ggplot(aes(x=one_minus_specificity,y=sensitivity))+
geom_line() + geom_point() +
geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
xlab("one_minus_specificity\n(false positive rate)")+
ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot3)
#set new threshold
qdamod_pred2 <- qdamod$pred %>%
#the accuracy doesn't improve by reducing the threshold any further than .40, 99.5% best.
mutate(prediction = ifelse(Yes>.40, 'Yes', 'No')) %>%
mutate(prediction = factor(prediction, levels=c('No','Yes')))
#new threshold matrix
confusionMatrix(qdamod_pred2$prediction, qdamod_pred2$obs, positive="Yes")
Confusion Matrix and Statistics
Reference
Prediction No Yes
No 9641 42
Yes 45 272
Accuracy : 0.9913
95% CI : (0.9893, 0.993)
No Information Rate : 0.9686
P-Value [Acc > NIR] : <2e-16
Kappa : 0.8576
Mcnemar's Test P-Value : 0.8302
Sensitivity : 0.8662
Specificity : 0.9954
Pos Pred Value : 0.8580
Neg Pred Value : 0.9957
Prevalence : 0.0314
Detection Rate : 0.0272
Detection Prevalence : 0.0317
Balanced Accuracy : 0.9308
'Positive' Class : Yes
LR
#GLM model
system.time({
glmmod=train(BlueClass~Red+Green+Blue,data=train,trControl=train_control,method="glm",preProcess = c("center","scale"), family="binomial")
})
user system elapsed
1.633 0.060 1.705
glmmod
Generalized Linear Model
10000 samples
3 predictor
2 classes: 'No', 'Yes'
Pre-processing: centered (3), scaled (3)
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 9000, 9001, 8999, 9000, 9000, 9000, ...
Resampling results:
Accuracy Kappa
0.9956001 0.9248834
##et prediction, probability, and cv score variables in case needed
glmmod_pred <- predict(glmmod, test,'raw')
glmmod_prob <- predict(glmmod, test,'prob')
glmmod_scored <- cbind(test, glmmod_pred, glmmod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)
#area under the curve
qda_auc = glmmod_prob %>%
yardstick::roc_auc(truth=test$BlueClass, Yes)
qda_auc
#ROC curve + plot
ROC_curve4<-glmmod_prob %>%
yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
dplyr::mutate(one_minus_specificity = 1-specificity)
ROC_curve_plot4 <- ROC_curve4 %>%
ggplot(aes(x=one_minus_specificity,y=sensitivity))+
geom_line() + geom_point() +
geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
xlab("one_minus_specificity\n(false positive rate)")+
ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot4)
#set new threshold
glmmod_pred2 <- glmmod$pred %>%
#the accuracy doesn't improve by reducing the threshold any further than .29, 99.6% best.
mutate(prediction = ifelse(Yes>.29, 'Yes', 'No')) %>%
mutate(prediction = factor(prediction, levels=c('No','Yes')))
#new threshold matrix
confusionMatrix(glmmod_pred2$prediction, glmmod_pred2$obs, positive="Yes")
Confusion Matrix and Statistics
Reference
Prediction No Yes
No 9669 23
Yes 17 291
Accuracy : 0.996
95% CI : (0.9946, 0.9971)
No Information Rate : 0.9686
P-Value [Acc > NIR] : <2e-16
Kappa : 0.9336
Mcnemar's Test P-Value : 0.4292
Sensitivity : 0.9268
Specificity : 0.9982
Pos Pred Value : 0.9448
Neg Pred Value : 0.9976
Prevalence : 0.0314
Detection Rate : 0.0291
Detection Prevalence : 0.0308
Balanced Accuracy : 0.9625
'Positive' Class : Yes
RF
#Random Forest model
#Choosing tuning parameters:
#https://discuss.analyticsvidhya.com/t/how-to-decide-no-of-ntrees-in-randomforest/6882/3
#https://rpubs.com/phamdinhkhanh/389752
#Create control function for training with 10 folds and keep 3 folds for training.
train_control <- caret::trainControl(method="cv", number=10, returnResamp='all', classProbs=TRUE, savePredictions='final')
#https://stackoverflow.com/questions/10085806/extracting-specific-columns-from-a-data-frame
df<- train %>%
select(Red,Green,Blue)
#mtryStart defaults at sqrt(p)
#my available threshold for mtry values is pretty low based on the size of my dataset
(tuneRF(df,train$BlueClass,mtry = 5, ntree = 500, stepFactor=5, improve=0.05,
trace=TRUE, plot=TRUE, doBest=TRUE))
invalid mtry: reset to within valid range
mtry = 5 OOB error = 0.4%
Searching left ...
mtry = 1 OOB error = 0.41%
-0.025 0.05
Searching right ...
mtry = 3 OOB error = 0.39%
0.025 0.05
Call:
randomForest(x = x, y = y, mtry = res[which.min(res[, 2]), 1])
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 3
OOB estimate of error rate: 0.41%
Confusion matrix:
No Yes class.error
No 9667 19 0.001961594
Yes 22 292 0.070063694

#mtry = 3
##------
tunegrid <- expand.grid(.mtry = 3)
modellist <- list()
#train with different ntree parameters and inspect bias/variance tradeoff
#findtrees1 <- train(BlueClass~Red+Green+Blue,
# data=train,
# method = 'rf',
# metric = 'Accuracy',
# tuneGrid = tunegrid,
# trControl = control,
# ntree = 50)
#findtrees1
#findtrees2 <- train(BlueClass~Red+Green+Blue,
# data=train,
# method = 'rf',
# metric = 'Accuracy',
# tuneGrid = tunegrid,
# trControl = control,
# ntree = 100)
#findtrees2
system.time({
RF <- train(BlueClass~Red+Green+Blue,
data=train,
method = 'rf',
metric = 'Accuracy',
tuneGrid = tunegrid,
trControl = train_control,
ntree = 500)
})
user system elapsed
16.611 1.681 18.652
RF
Random Forest
10000 samples
3 predictor
2 classes: 'No', 'Yes'
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 9001, 9000, 8999, 9001, 9000, 8999, ...
Resampling results:
Accuracy Kappa
0.9959998 0.9337306
Tuning parameter 'mtry' was held constant at a value of 3
##et prediction, probability, and cv score variables in case needed
rfmod_pred <- predict(RF, test,'raw')
rfmod_prob <- predict(RF, test,'prob')
rfmod_scored <- cbind(test, rfmod_pred, rfmod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)
#area under the curve
rf_auc = rfmod_prob %>%
yardstick::roc_auc(truth=test$BlueClass, Yes)
rf_auc
#ROC curve + plot
ROC_curve4<-rfmod_prob %>%
yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
dplyr::mutate(one_minus_specificity = 1-specificity)
ROC_curve_plot4 <- ROC_curve4 %>%
ggplot(aes(x=one_minus_specificity,y=sensitivity))+
geom_line() + geom_point() +
geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
xlab("one_minus_specificity\n(false positive rate)")+
ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot4)
#set new threshold
rfmod_pred2 <- RF$pred %>%
#the accuracy doesn't improve by reducing the threshold any further than .50, 99.7% best.
mutate(prediction = ifelse(Yes>.50, 'Yes', 'No')) %>%
mutate(prediction = factor(prediction, levels=c('No','Yes')))
#new threshold matrix
confusionMatrix(rfmod_pred2$prediction, rfmod_pred2$obs, positive="Yes")
Confusion Matrix and Statistics
Reference
Prediction No Yes
No 9669 23
Yes 17 291
Accuracy : 0.996
95% CI : (0.9946, 0.9971)
No Information Rate : 0.9686
P-Value [Acc > NIR] : <2e-16
Kappa : 0.9336
Mcnemar's Test P-Value : 0.4292
Sensitivity : 0.9268
Specificity : 0.9982
Pos Pred Value : 0.9448
Neg Pred Value : 0.9976
Prevalence : 0.0314
Detection Rate : 0.0291
Detection Prevalence : 0.0308
Balanced Accuracy : 0.9625
'Positive' Class : Yes
SVM
#Choosing tuning parameters:
#Linear
set.seed(1)
tune.out.linear=tune(svm,BlueClass~Red+Green+Blue,data=train,kernel="linear",ranges=list(cost=c(0.001, 0.01, 0.1, 1,5,10,100)))
#Radial
tune.out.radial=tune(svm, BlueClass~Red+Green+Blue,data=train, kernel="radial", ranges=list(cost=c(0.1,1,10,100,1000),gamma=c(0.5,1,2,3,4)))
#lowest error is radial cost 10 gamma 1
#Poly
tune.out.poly=tune(svm,BlueClass~Red+Green+Blue,data=train, kernel="polynomial", ranges=list(cost=c(0.1,1,10,100,1000),degree=c(1,2,3,4,5)))
##------
system.time({
svmmod <- train(BlueClass~Red+Green+Blue,
data=train,
method = 'svmRadial',
metric = 'Accuracy',
trControl = train_control,
cost = 10,
gamma = 1,
preProcess = c("center","scale")
)
})
#Set prediction, probability, and cv score variables in case needed
svmmod_pred <- predict(svmmod, test,'raw')
svmmod_prob <- predict(svmmod, test,'prob')
svmmod_scored <- cbind(test, svmmod_pred, svmmod_prob)
#AUC/ROC
options(yardstick.event_first=FALSE)
#area under the curve
svmmod_auc = svmmod_prob %>%
yardstick::roc_auc(truth=test$BlueClass, Yes)
svmmod_auc
#ROC curve + plot
ROC_curve4<-svmmod_prob %>%
yardstick::roc_curve(truth=test$BlueClass,estimate=Yes) %>%
dplyr::mutate(one_minus_specificity = 1-specificity)
ROC_curve_plot4 <- ROC_curve4 %>%
ggplot(aes(x=one_minus_specificity,y=sensitivity))+
geom_line() + geom_point() +
geom_abline(slope = 1,intercept = 0, linetype='dashed',color='blue')+
xlab("one_minus_specificity\n(false positive rate)")+
ggtitle('LDA ROC curve')
ggplotly(ROC_curve_plot4)
#set new threshold
svmmod_pred2 <- svmmod$pred %>%
#the accuracy doesn't improve by reducing the threshold any further than .52, 99.7% best.
mutate(prediction = ifelse(Yes>.52, 'Yes', 'No')) %>%
mutate(prediction = factor(prediction, levels=c('No','Yes')))
#new threshold matrix
confusionMatrix(svmmod_pred2$prediction, svmmod_pred2$obs, positive="Yes")
Confusion Matrix and Statistics
Reference
Prediction No Yes
No 9673 28
Yes 13 286
Accuracy : 0.9959
95% CI : (0.9944, 0.9971)
No Information Rate : 0.9686
P-Value [Acc > NIR] : < 2e-16
Kappa : 0.931
Mcnemar's Test P-Value : 0.02878
Sensitivity : 0.9108
Specificity : 0.9987
Pos Pred Value : 0.9565
Neg Pred Value : 0.9971
Prevalence : 0.0314
Detection Rate : 0.0286
Detection Prevalence : 0.0299
Balanced Accuracy : 0.9547
'Positive' Class : Yes
Tuning parameter selection process
- Interpretation of your chosen tuning parameter values:
- Ntree = 500 indicates that 500 trees were created. I understand that it is important for this number to be substantial to reduce variance, but as we know, that can also lead to bias. There should be a sweet spot where every input row gets predicted at least a few times without overfitting the trees. Mtry=5 indicates that 5 variables were randomly sampled at each split. When mtry=p it can essentially equate to bagging, whereas if its set to 1 it essentially chooses a random variable. I understand it is good to try out a few values that range no smaller than 2 and no larger than p. The gamma=1 svm parameter indicates that a single training example has far reaching influence. A cost = 10 here means we are "paying" a high price for higher accuracy.
- Explanation of how your chosen tuning parameter values were chosen:
- I tried to make this selection process as programmatic as possible. There was a lot of learning and I'm sure there is still a healthy amount that should be corrected within the code but I was glad to find the resources that I did. For mtry I used the tuneRF() function that accepts an initial value for mtry and returns the out-of-bag error for your input value as well as a few surrounding values. I chose mtry because it produced the lowest oob error and went on to choose ntrees from there. At that point I included my mtry=3 into several RF models with ntrees of different values and again looked for the highest accuracy. Ntrees=500 was the winner in that sense, and it appears to be a very commonly used value for that parameter. For the SVM parameters I used our class lab as a guide to run the tune() function for linear, radial and polynomial kernels with several cost, gamma and degree values respectively. The tune function returns a best performer, which was the radial kernel with cost = 10 and gamma = 1. When running the train function the sigma = 8.691262 and C = 1 values were returned as contributing to the highest accuracy and therefore were the most optimal values for those parameters.
Conclusions
- A discussion of the best performing algorithm(s) in the cross-validation and hold-out data
- The best performing algorithm in the cross validation spectrum would be GLM or QDA. Both run in under 1.5 seconds to the user (up to ~15 seconds faster than some of the others) and still have an accuracy over 99.4%. The hold out data general ran very slowly, which could be due to the size of the dataset, but given the consistently high accuracy and easy of use of these two functions I would be even more likely to advocate for them in hold out setting.
- A recommendation and rationale regarding which algorithm to use for detection of blue tarps
- For the detection of blue tarps I’d recommend using the radial kernel svm function. Its valuable to tune your parameters according to your data and how you may define accuracy. Particularly with this pixel data, you could lower the threshold for determining a blue tarp knowing that it goes beyond the original tarp limits, but avoids the possibility of neglecting a human in need. The reason I became partial to these functions in the context of a natural disaster, is because it can give you a starting point for additional tuning. Accuracy and automation are two things I would assume are extremely valuable in the wake of a natural disaster.
- Additional thoughts and questions:
- I’ve done some research to learn more about how RGB average thresholds are set in order to determine that it is a “green” vegetation area or “red” soil area. I understand from a paper titled “Geospatial Disaster Response during the Haiti Earthquake: A Case Study Spanning Airborne Deployment, Data Collection, Transfer, Processing, and Dissemination” that a Mahalanobis distance classifier was trained using a collection of known blue tarp pixels, the RGB values of the pixels in the photos were averaged, and that’s how the other pixels are determined to be blue tarp or not to be blue tarp. My issue is largely that the average of those three values is not exclusive to the color. I read that “RGB (Red, Green, Blue) are 8 bit each. The range for each individual colour is 0-255 (as 2^8 = 256 possibilities). The combination range is 256256256. By dividing by 255, the 0-255 range can be described with a 0.0-1.0 range where 0.0 means 0 (0x00) and 1.0 means 255 (0xFF).” So how could the average of these three values indicate something is blue over it being red, when the average for something being ‘true’ red (255,0,0) would have the same average as something being ‘true’ green or blue at (0,255,0) or (0,0,255) respectively. It seems more logical for there to be a lower bound limit to what could be considered blue, in addition to it being at least 1.2x the values of the other two for example. There are studies around accessibility and exactly what shade behins to be unseeable by someone who is color blind, how do they determine their thresholds? That will be my next rabbit hole.